-
Notifications
You must be signed in to change notification settings - Fork 20
Add audio translation task type and provider #335
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
julien-nc
commented
Feb 4, 2026
- New audio2audio:translate task type (will be created in server soon)
- Audio translation provider
- Factorize translation logic in a service
- Use correct user language for IL10N text translations happening in the task
f3a6a09 to
0c8d7be
Compare
…, factorize translation logic in a service use the correct user language for text translations happening in the task Signed-off-by: Julien Veyssier <julien-nc@posteo.net>
0c8d7be to
2e45e17
Compare
| 'text_output' => new ShapeDescriptor( | ||
| $this->l->t('Text output'), | ||
| $this->l->t('The text translation'), | ||
| EShapeType::Text, | ||
| ), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe the transcribed text of the audio input can be sent back too, it's already computed.
| public function getOptionalInputShape(): array { | ||
| return []; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
wdyt of adding the voice and speed optional params here too, same as the text-to-speech task?
integration_openai/lib/TaskProcessing/TextToSpeechProvider.php
Lines 64 to 84 in dfd4567
| public function getOptionalInputShape(): array { | |
| return [ | |
| 'voice' => new ShapeDescriptor( | |
| $this->l->t('Voice'), | |
| $this->l->t('The voice to use'), | |
| EShapeType::Enum | |
| ), | |
| 'model' => new ShapeDescriptor( | |
| $this->l->t('Model'), | |
| $this->l->t('The model used to generate the speech'), | |
| EShapeType::Enum | |
| ), | |
| 'speed' => new ShapeDescriptor( | |
| $this->l->t('Speed'), | |
| $this->openAiAPIService->isUsingOpenAi(Application::SERVICE_TYPE_TTS) | |
| ? $this->l->t('Speech speed modifier (Valid values: 0.25-4)') | |
| : $this->l->t('Speech speed modifier'), | |
| EShapeType::Number | |
| ) | |
| ]; | |
| } |
| $this->logger->warning('Text to speech generation failed: no speech returned'); | ||
| throw new ProcessingException('Text to speech generation failed: no speech returned'); | ||
| } | ||
| $translatedAudio = $includeWatermark ? $this->watermarkingService->markAudio($apiResponse['body']) : $apiResponse['body']; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe better to watermark the transcript of the input audio so the translated text and the translated audio both have the watermark in the target language
| if ($includeWatermark) { | ||
| if ($userId !== null) { | ||
| $user = $this->userManager->getExistingUser($userId); | ||
| $lang = $this->l10nFactory->getUserLanguage($user); | ||
| $l = $this->l10nFactory->get(Application::APP_ID, $lang); | ||
| $ttsPrompt .= "\n\n" . $l->t('This was generated using Artificial Intelligence.'); | ||
| } else { | ||
| $ttsPrompt .= "\n\n" . $this->l->t('This was generated using Artificial Intelligence.'); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this can also work but it would add the text/audio in the user's language which may or may not be the target language.
| $context->registerTaskProcessingTaskType(AudioToAudioTranslateTaskType::class); | ||
| $context->registerTaskProcessingProvider(AudioToAudioTranslateProvider::class); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
checks for STT and TTS providers would be nice too